library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.1
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
setwd('C:/Users/mecha_000/Desktop/DataScience/Python/DataVisualizations/project')

loans <- read.csv(file = 'prosperLoanData.csv', sep = ',', quote = "\"'")
loans$ListingCreationDate <- as.Date(loans$ListingCreationDate)
loans$ListingCreationYear <- as.numeric(format(loans$ListingCreationDate, "%Y"))

loans.summary_by_year <-
  loans %>%
  group_by(ListingCreationYear) %>%
  summarise(mean_loanOriginalAmount = mean(as.numeric(LoanOriginalAmount)),
            median_loanOriginalAmount = median(as.numeric(LoanOriginalAmount)),
            sum_loanOriginalAmount = sum(as.numeric(LoanOriginalAmount)),
            
            mean_amountDelinquent = mean(as.numeric(AmountDelinquent)),
            median_amountDelinquent = median(as.numeric(AmountDelinquent)),
            sum_amountDelinquent = sum(as.numeric(AmountDelinquent)),
            
            mean_investors = mean(as.numeric(Investors)),
            median_investors = median(as.numeric(Investors)),
            sum_investors = sum(as.numeric(Investors)),
            
            mean_prosperScore = mean(as.numeric(ProsperScore)),
            median_prosperScore = median(as.numeric(ProsperScore)),
            sum_prosperScore = sum(as.numeric(ProsperScore)),
            
            mean_estimatedLoss = mean(as.numeric(EstimatedLoss)),
            median_estimatedLoss = median(as.numeric(EstimatedLoss)),
            sum_estimatedLoss = sum(as.numeric(EstimatedLoss)),
            
            mean_estimatedReturn = mean(as.numeric(EstimatedReturn)),
            median_estimatedReturn = median(as.numeric(EstimatedReturn)),
            sum_estimatedReturn = sum(as.numeric(EstimatedReturn)),
            
            count = n()) %>%
  arrange(ListingCreationYear)


write.csv(loans.summary_by_year, file="prosperLoanData_SummaryByYear.csv")

summary(loans)
##                    ListingKey     ListingNumber     ListingCreationDate 
##  17A93590655669644DB4C06:     6   Min.   :      4   Min.   :2005-11-09  
##  349D3587495831350F0F648:     4   1st Qu.: 400919   1st Qu.:2008-09-19  
##  47C1359638497431975670B:     4   Median : 600554   Median :2012-06-16  
##  8474358854651984137201C:     4   Mean   : 627886   Mean   :2011-07-08  
##  DE8535960513435199406CE:     4   3rd Qu.: 892634   3rd Qu.:2013-09-09  
##  04C13599434217079754AEE:     3   Max.   :1255725   Max.   :2014-03-10  
##  (Other)                :113912                                         
##   CreditGrade         Term                       LoanStatus   
##         :84984   Min.   :12.00   Current              :56576  
##  C      : 5649   1st Qu.:36.00   Completed            :38074  
##  D      : 5153   Median :36.00   Chargedoff           :11992  
##  B      : 4389   Mean   :40.83   Defaulted            : 5018  
##  AA     : 3509   3rd Qu.:36.00   Past Due (1-15 days) :  806  
##  HR     : 3508   Max.   :60.00   Past Due (31-60 days):  363  
##  (Other): 6745                   (Other)              : 1108  
##                ClosedDate     BorrowerAPR       BorrowerRate   
##                     :58848   Min.   :0.00653   Min.   :0.0000  
##  2014-03-04 00:00:00:  105   1st Qu.:0.15629   1st Qu.:0.1340  
##  2014-02-19 00:00:00:  100   Median :0.20976   Median :0.1840  
##  2014-02-11 00:00:00:   92   Mean   :0.21883   Mean   :0.1928  
##  2012-10-30 00:00:00:   81   3rd Qu.:0.28381   3rd Qu.:0.2500  
##  2013-02-26 00:00:00:   78   Max.   :0.51229   Max.   :0.4975  
##  (Other)            :54633   NA's   :25                        
##   LenderYield      EstimatedEffectiveYield EstimatedLoss  
##  Min.   :-0.0100   Min.   :-0.183          Min.   :0.005  
##  1st Qu.: 0.1242   1st Qu.: 0.116          1st Qu.:0.042  
##  Median : 0.1730   Median : 0.162          Median :0.072  
##  Mean   : 0.1827   Mean   : 0.169          Mean   :0.080  
##  3rd Qu.: 0.2400   3rd Qu.: 0.224          3rd Qu.:0.112  
##  Max.   : 0.4925   Max.   : 0.320          Max.   :0.366  
##                    NA's   :29084           NA's   :29084  
##  EstimatedReturn  ProsperRating..numeric. ProsperRating..Alpha.
##  Min.   :-0.183   Min.   :1.000                  :29084        
##  1st Qu.: 0.074   1st Qu.:3.000           C      :18345        
##  Median : 0.092   Median :4.000           B      :15581        
##  Mean   : 0.096   Mean   :4.072           A      :14551        
##  3rd Qu.: 0.117   3rd Qu.:5.000           D      :14274        
##  Max.   : 0.284   Max.   :7.000           E      : 9795        
##  NA's   :29084    NA's   :29084           (Other):12307        
##   ProsperScore   ListingCategory..numeric. BorrowerState  
##  Min.   : 1.00   Min.   : 0.000            CA     :14717  
##  1st Qu.: 4.00   1st Qu.: 1.000            TX     : 6842  
##  Median : 6.00   Median : 1.000            NY     : 6729  
##  Mean   : 5.95   Mean   : 2.774            FL     : 6720  
##  3rd Qu.: 8.00   3rd Qu.: 3.000            IL     : 5921  
##  Max.   :11.00   Max.   :20.000                   : 5515  
##  NA's   :29084                             (Other):67493  
##                     Occupation         EmploymentStatus
##  Other                   :28617   Employed     :67322  
##  Professional            :13628   Full-time    :26355  
##  Computer Programmer     : 4478   Self-employed: 6134  
##  Executive               : 4311   Not available: 5347  
##  Teacher                 : 3759   Other        : 3806  
##  Administrative Assistant: 3688                : 2255  
##  (Other)                 :55456   (Other)      : 2718  
##  EmploymentStatusDuration IsBorrowerHomeowner CurrentlyInGroup
##  Min.   :  0.00           False:56459         False:101218    
##  1st Qu.: 26.00           True :57478         True : 12719    
##  Median : 67.00                                               
##  Mean   : 96.07                                               
##  3rd Qu.:137.00                                               
##  Max.   :755.00                                               
##  NA's   :7625                                                 
##                     GroupKey                 DateCreditPulled 
##                         :100596   2013-12-23 09:38:12:     6  
##  783C3371218786870A73D20:  1140   2013-11-21 09:09:41:     4  
##  3D4D3366260257624AB272D:   916   2013-12-06 05:43:16:     4  
##  6A3B336601725506917317E:   698   2014-01-14 20:17:49:     4  
##  FEF83377364176536637E50:   611   2014-02-09 12:14:41:     4  
##  C9643379247860156A00EC0:   342   2013-09-27 22:04:54:     3  
##  (Other)                :  9634   (Other)            :113912  
##  CreditScoreRangeLower CreditScoreRangeUpper
##  Min.   :  0.0         Min.   : 19.0        
##  1st Qu.:660.0         1st Qu.:679.0        
##  Median :680.0         Median :699.0        
##  Mean   :685.6         Mean   :704.6        
##  3rd Qu.:720.0         3rd Qu.:739.0        
##  Max.   :880.0         Max.   :899.0        
##  NA's   :591           NA's   :591          
##         FirstRecordedCreditLine CurrentCreditLines OpenCreditLines
##                     :   697     Min.   : 0.00      Min.   : 0.00  
##  1993-12-01 00:00:00:   185     1st Qu.: 7.00      1st Qu.: 6.00  
##  1994-11-01 00:00:00:   178     Median :10.00      Median : 9.00  
##  1995-11-01 00:00:00:   168     Mean   :10.32      Mean   : 9.26  
##  1990-04-01 00:00:00:   161     3rd Qu.:13.00      3rd Qu.:12.00  
##  1995-03-01 00:00:00:   159     Max.   :59.00      Max.   :54.00  
##  (Other)            :112389     NA's   :7604       NA's   :7604   
##  TotalCreditLinespast7years OpenRevolvingAccounts
##  Min.   :  2.00             Min.   : 0.00        
##  1st Qu.: 17.00             1st Qu.: 4.00        
##  Median : 25.00             Median : 6.00        
##  Mean   : 26.75             Mean   : 6.97        
##  3rd Qu.: 35.00             3rd Qu.: 9.00        
##  Max.   :136.00             Max.   :51.00        
##  NA's   :697                                     
##  OpenRevolvingMonthlyPayment InquiriesLast6Months TotalInquiries   
##  Min.   :    0.0             Min.   :  0.000      Min.   :  0.000  
##  1st Qu.:  114.0             1st Qu.:  0.000      1st Qu.:  2.000  
##  Median :  271.0             Median :  1.000      Median :  4.000  
##  Mean   :  398.3             Mean   :  1.435      Mean   :  5.584  
##  3rd Qu.:  525.0             3rd Qu.:  2.000      3rd Qu.:  7.000  
##  Max.   :14985.0             Max.   :105.000      Max.   :379.000  
##                              NA's   :697          NA's   :1159     
##  CurrentDelinquencies AmountDelinquent   DelinquenciesLast7Years
##  Min.   : 0.0000      Min.   :     0.0   Min.   : 0.000         
##  1st Qu.: 0.0000      1st Qu.:     0.0   1st Qu.: 0.000         
##  Median : 0.0000      Median :     0.0   Median : 0.000         
##  Mean   : 0.5921      Mean   :   984.5   Mean   : 4.155         
##  3rd Qu.: 0.0000      3rd Qu.:     0.0   3rd Qu.: 3.000         
##  Max.   :83.0000      Max.   :463881.0   Max.   :99.000         
##  NA's   :697          NA's   :7622       NA's   :990            
##  PublicRecordsLast10Years PublicRecordsLast12Months RevolvingCreditBalance
##  Min.   : 0.0000          Min.   : 0.000            Min.   :      0       
##  1st Qu.: 0.0000          1st Qu.: 0.000            1st Qu.:   3121       
##  Median : 0.0000          Median : 0.000            Median :   8549       
##  Mean   : 0.3126          Mean   : 0.015            Mean   :  17599       
##  3rd Qu.: 0.0000          3rd Qu.: 0.000            3rd Qu.:  19521       
##  Max.   :38.0000          Max.   :20.000            Max.   :1435667       
##  NA's   :697              NA's   :7604              NA's   :7604          
##  BankcardUtilization AvailableBankcardCredit  TotalTrades    
##  Min.   :0.000       Min.   :     0          Min.   :  0.00  
##  1st Qu.:0.310       1st Qu.:   880          1st Qu.: 15.00  
##  Median :0.600       Median :  4100          Median : 22.00  
##  Mean   :0.561       Mean   : 11210          Mean   : 23.23  
##  3rd Qu.:0.840       3rd Qu.: 13180          3rd Qu.: 30.00  
##  Max.   :5.950       Max.   :646285          Max.   :126.00  
##  NA's   :7604        NA's   :7544            NA's   :7544    
##  TradesNeverDelinquent..percentage. TradesOpenedLast6Months
##  Min.   :0.000                      Min.   : 0.000         
##  1st Qu.:0.820                      1st Qu.: 0.000         
##  Median :0.940                      Median : 0.000         
##  Mean   :0.886                      Mean   : 0.802         
##  3rd Qu.:1.000                      3rd Qu.: 1.000         
##  Max.   :1.000                      Max.   :20.000         
##  NA's   :7544                       NA's   :7544           
##  DebtToIncomeRatio         IncomeRange    IncomeVerifiable
##  Min.   : 0.000    $25,000-49,999:32192   False:  8669    
##  1st Qu.: 0.140    $50,000-74,999:31050   True :105268    
##  Median : 0.220    $100,000+     :17337                   
##  Mean   : 0.276    $75,000-99,999:16916                   
##  3rd Qu.: 0.320    Not displayed : 7741                   
##  Max.   :10.010    $1-24,999     : 7274                   
##  NA's   :8554      (Other)       : 1427                   
##  StatedMonthlyIncome                    LoanKey       TotalProsperLoans
##  Min.   :      0     CB1B37030986463208432A1:     6   Min.   :0.00     
##  1st Qu.:   3200     2DEE3698211017519D7333F:     4   1st Qu.:1.00     
##  Median :   4667     9F4B37043517554537C364C:     4   Median :1.00     
##  Mean   :   5608     D895370150591392337ED6D:     4   Mean   :1.42     
##  3rd Qu.:   6825     E6FB37073953690388BC56D:     4   3rd Qu.:2.00     
##  Max.   :1750003     0D8F37036734373301ED419:     3   Max.   :8.00     
##                      (Other)                :113912   NA's   :91852    
##  TotalProsperPaymentsBilled OnTimeProsperPayments
##  Min.   :  0.00             Min.   :  0.00       
##  1st Qu.:  9.00             1st Qu.:  9.00       
##  Median : 16.00             Median : 15.00       
##  Mean   : 22.93             Mean   : 22.27       
##  3rd Qu.: 33.00             3rd Qu.: 32.00       
##  Max.   :141.00             Max.   :141.00       
##  NA's   :91852              NA's   :91852        
##  ProsperPaymentsLessThanOneMonthLate ProsperPaymentsOneMonthPlusLate
##  Min.   : 0.00                       Min.   : 0.00                  
##  1st Qu.: 0.00                       1st Qu.: 0.00                  
##  Median : 0.00                       Median : 0.00                  
##  Mean   : 0.61                       Mean   : 0.05                  
##  3rd Qu.: 0.00                       3rd Qu.: 0.00                  
##  Max.   :42.00                       Max.   :21.00                  
##  NA's   :91852                       NA's   :91852                  
##  ProsperPrincipalBorrowed ProsperPrincipalOutstanding
##  Min.   :    0            Min.   :    0              
##  1st Qu.: 3500            1st Qu.:    0              
##  Median : 6000            Median : 1627              
##  Mean   : 8472            Mean   : 2930              
##  3rd Qu.:11000            3rd Qu.: 4127              
##  Max.   :72499            Max.   :23451              
##  NA's   :91852            NA's   :91852              
##  ScorexChangeAtTimeOfListing LoanCurrentDaysDelinquent
##  Min.   :-209.00             Min.   :   0.0           
##  1st Qu.: -35.00             1st Qu.:   0.0           
##  Median :  -3.00             Median :   0.0           
##  Mean   :  -3.22             Mean   : 152.8           
##  3rd Qu.:  25.00             3rd Qu.:   0.0           
##  Max.   : 286.00             Max.   :2704.0           
##  NA's   :95009                                        
##  LoanFirstDefaultedCycleNumber LoanMonthsSinceOrigination   LoanNumber    
##  Min.   : 0.00                 Min.   :  0.0              Min.   :     1  
##  1st Qu.: 9.00                 1st Qu.:  6.0              1st Qu.: 37332  
##  Median :14.00                 Median : 21.0              Median : 68599  
##  Mean   :16.27                 Mean   : 31.9              Mean   : 69444  
##  3rd Qu.:22.00                 3rd Qu.: 65.0              3rd Qu.:101901  
##  Max.   :44.00                 Max.   :100.0              Max.   :136486  
##  NA's   :96985                                                            
##  LoanOriginalAmount          LoanOriginationDate LoanOriginationQuarter
##  Min.   : 1000      2014-01-22 00:00:00:   491   Q4 2013:14450         
##  1st Qu.: 4000      2013-11-13 00:00:00:   490   Q1 2014:12172         
##  Median : 6500      2014-02-19 00:00:00:   439   Q3 2013: 9180         
##  Mean   : 8337      2013-10-16 00:00:00:   434   Q2 2013: 7099         
##  3rd Qu.:12000      2014-01-28 00:00:00:   339   Q3 2012: 5632         
##  Max.   :35000      2013-09-24 00:00:00:   316   Q2 2012: 5061         
##                     (Other)            :111428   (Other):60343         
##                    MemberKey      MonthlyLoanPayment LP_CustomerPayments
##  63CA34120866140639431C9:     9   Min.   :   0.0     Min.   :   -2.35   
##  16083364744933457E57FB9:     8   1st Qu.: 131.6     1st Qu.: 1005.76   
##  3A2F3380477699707C81385:     8   Median : 217.7     Median : 2583.83   
##  4D9C3403302047712AD0CDD:     8   Mean   : 272.5     Mean   : 4183.08   
##  739C338135235294782AE75:     8   3rd Qu.: 371.6     3rd Qu.: 5548.40   
##  7E1733653050264822FAA3D:     8   Max.   :2251.5     Max.   :40702.39   
##  (Other)                :113888                                         
##  LP_CustomerPrincipalPayments LP_InterestandFees LP_ServiceFees   
##  Min.   :    0.0              Min.   :   -2.35   Min.   :-664.87  
##  1st Qu.:  500.9              1st Qu.:  274.87   1st Qu.: -73.18  
##  Median : 1587.5              Median :  700.84   Median : -34.44  
##  Mean   : 3105.5              Mean   : 1077.54   Mean   : -54.73  
##  3rd Qu.: 4000.0              3rd Qu.: 1458.54   3rd Qu.: -13.92  
##  Max.   :35000.0              Max.   :15617.03   Max.   :  32.06  
##                                                                   
##  LP_CollectionFees  LP_GrossPrincipalLoss LP_NetPrincipalLoss
##  Min.   :-9274.75   Min.   :  -94.2       Min.   : -954.5    
##  1st Qu.:    0.00   1st Qu.:    0.0       1st Qu.:    0.0    
##  Median :    0.00   Median :    0.0       Median :    0.0    
##  Mean   :  -14.24   Mean   :  700.4       Mean   :  681.4    
##  3rd Qu.:    0.00   3rd Qu.:    0.0       3rd Qu.:    0.0    
##  Max.   :    0.00   Max.   :25000.0       Max.   :25000.0    
##                                                              
##  LP_NonPrincipalRecoverypayments PercentFunded    Recommendations   
##  Min.   :    0.00                Min.   :0.7000   Min.   : 0.00000  
##  1st Qu.:    0.00                1st Qu.:1.0000   1st Qu.: 0.00000  
##  Median :    0.00                Median :1.0000   Median : 0.00000  
##  Mean   :   25.14                Mean   :0.9986   Mean   : 0.04803  
##  3rd Qu.:    0.00                3rd Qu.:1.0000   3rd Qu.: 0.00000  
##  Max.   :21117.90                Max.   :1.0125   Max.   :39.00000  
##                                                                     
##  InvestmentFromFriendsCount InvestmentFromFriendsAmount   Investors      
##  Min.   : 0.00000           Min.   :    0.00            Min.   :   1.00  
##  1st Qu.: 0.00000           1st Qu.:    0.00            1st Qu.:   2.00  
##  Median : 0.00000           Median :    0.00            Median :  44.00  
##  Mean   : 0.02346           Mean   :   16.55            Mean   :  80.48  
##  3rd Qu.: 0.00000           3rd Qu.:    0.00            3rd Qu.: 115.00  
##  Max.   :33.00000           Max.   :25000.00            Max.   :1189.00  
##                                                                          
##  ListingCreationYear
##  Min.   :2005       
##  1st Qu.:2008       
##  Median :2012       
##  Mean   :2011       
##  3rd Qu.:2013       
##  Max.   :2014       
## 
# Histogram of loans by amount.
qplot(data = loans, x = LoanOriginalAmount, binwidth = 500)

# Histogram of loans by date.
qplot(data = loans, x = ListingCreationDate)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

# Histogram of loans by delinquent amount.
qplot(data = loans, x = AmountDelinquent, binwidth = 500)

# Histogram of loans by occupations
qplot(data = loans, x = Occupation)

# Histogram of loans by state
qplot(data = loans, x = BorrowerState)

# Histogram of loans by income range
qplot(data = loans, x = IncomeRange)

# Histogram of loans by number of investors
qplot(data = loans, x = Investors, binwidth = 10)

# Histogram of loans by lower credit score
qplot(data = loans, x = CreditScoreRangeLower, binwidth = 10)

# Histogram of loans by upper credit score
qplot(data = loans, x = CreditScoreRangeUpper, binwidth = 10)

# Histogram of loans by prosper risk score
qplot(data = loans, x = ProsperScore, binwidth = 1)

# Histogram of loans by estimated loss
qplot(data = loans, x = EstimatedLoss, binwidth = .01)

# Histogram of loans by estimated return
qplot(data = loans, x = EstimatedReturn, binwidth = .01)

# Histogram of loans by amount per state
qplot(data = loans, x = LoanOriginalAmount, binwidth = 1000) +
  facet_wrap( ~ BorrowerState) +
  labs(title = "Distribution of loan amounts per state",
       x = "Loan amount", y = "Number of loans")

# Histogram of loans by amount per occupation
qplot(data = loans, x = LoanOriginalAmount, binwidth = 1000) +
  facet_wrap( ~ Occupation) +
  labs(title = "Distribution of loan amounts per occupation",
       x = "Loan amount", y = "Number of loans")

# Box plot of loan amounts and income ranges
ggplot(
  aes(x = IncomeRange, y = LoanOriginalAmount),
  data = loans) +
  geom_boxplot() +
  labs(
    title = "Income to loan amount",
    x = "Income", y = "Loan amount")

# Box plot of loan amounts and occupations.
ggplot(
  aes(x = Occupation, y = LoanOriginalAmount),
  data = loans) +
  geom_boxplot() +
  labs(
    title = "Income to loan amount",
    x = "Income", y = "Loan amount")

# Box plot of loan amounts and state
ggplot(
  aes(x = BorrowerState, y = LoanOriginalAmount),
  data = loans) +
  geom_boxplot() +
  labs(
    title = "Income to loan amount",
    x = "Income", y = "Loan amount")

###


# Scatter plot of monthly payments and loan amounts, colored by estimated return,
ggplot(
  aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
  data = subset(loans, )
) +
  geom_point(aes(color = EstimatedReturn)) +
  labs(
    title = "Monthly payment to loan amount",
    x = "Monthly payment", y = "Loan amount")

# Scatter plot of monthly payments and loan amounts, colored by estimated return,
# and faceted by Prosper's risk score
ggplot(
  aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
    data = subset(loans, )
  ) +
  geom_point(aes(color = EstimatedReturn)) +
  facet_wrap(~ ProsperScore) +
  labs(
    title = "Monthly payment to loan amount",
    x = "Monthly payment", y = "Loan amount")

# Scatter plot of monthly payments and loan amounts, colored by estimated return,
# and faceted by Occupation
ggplot(
  aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
  data = subset(loans, )
) +
  geom_point(aes(color = EstimatedReturn)) +
  facet_wrap(~ Occupation) +
  labs(
    title = "Monthly payment to loan amount",
    x = "Monthly payment", y = "Loan amount")

# Scatter plot of monthly payments and loan amounts, colored by estimated return,
# and faceted by state
ggplot(
  aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
    data = subset(loans, )
  ) +
  geom_point(aes(color = EstimatedReturn)) +
  facet_wrap(~ BorrowerState) +
  labs(
    title = "Monthly payment to loan amount",
    x = "Monthly payment", y = "Loan amount")

# Scatter plot of monthly payments and loan amounts, colored by estimated return,
# and faceted by income range
ggplot(
  aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
  data = subset(loans, )
) +
  geom_point(aes(color = EstimatedReturn)) +
  facet_wrap(~ IncomeRange) +
  labs(
    title = "Monthly payment to loan amount",
    x = "Monthly payment", y = "Loan amount")

###


# Scatterplot of estimated loss to estimated return, colored by loan amount,
ggplot(
  aes(x = EstimatedLoss, y = EstimatedReturn),
  data = loans) +
  geom_point(aes(color = LoanOriginalAmount)) +
  labs(
    title = "Estimated loss to return",
    x = "Estimated loss", y = "Estimated return")
## Warning: Removed 29084 rows containing missing values (geom_point).

# Scatterplot of estimated loss to estimated return, colored by loan amount,
# and faceted by occupation
ggplot(
  aes(x = EstimatedLoss, y = EstimatedReturn),
  data = loans) +
  geom_point(aes(color = LoanOriginalAmount)) +
  facet_wrap( ~ Occupation) +
  labs(
    title = "Estimated loss to return",
    x = "Estimated loss", y = "Estimated return")
## Warning: Removed 2255 rows containing missing values (geom_point).
## Warning: Removed 659 rows containing missing values (geom_point).
## Warning: Removed 980 rows containing missing values (geom_point).
## Warning: Removed 867 rows containing missing values (geom_point).
## Warning: Removed 64 rows containing missing values (geom_point).
## Warning: Removed 180 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 66 rows containing missing values (geom_point).
## Warning: Removed 37 rows containing missing values (geom_point).
## Warning: Removed 36 rows containing missing values (geom_point).
## Warning: Removed 318 rows containing missing values (geom_point).
## Warning: Removed 39 rows containing missing values (geom_point).
## Warning: Removed 1048 rows containing missing values (geom_point).
## Warning: Removed 1242 rows containing missing values (geom_point).
## Warning: Removed 464 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 101 rows containing missing values (geom_point).
## Warning: Removed 49 rows containing missing values (geom_point).
## Warning: Removed 225 rows containing missing values (geom_point).
## Warning: Removed 271 rows containing missing values (geom_point).
## Warning: Removed 843 rows containing missing values (geom_point).
## Warning: Removed 103 rows containing missing values (geom_point).
## Warning: Removed 36 rows containing missing values (geom_point).
## Warning: Removed 286 rows containing missing values (geom_point).
## Warning: Removed 234 rows containing missing values (geom_point).
## Warning: Removed 63 rows containing missing values (geom_point).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 378 rows containing missing values (geom_point).
## Warning: Removed 64 rows containing missing values (geom_point).
## Warning: Removed 226 rows containing missing values (geom_point).
## Warning: Removed 448 rows containing missing values (geom_point).
## Warning: Removed 94 rows containing missing values (geom_point).
## Warning: Removed 109 rows containing missing values (geom_point).
## Warning: Removed 76 rows containing missing values (geom_point).
## Warning: Removed 330 rows containing missing values (geom_point).
## Warning: Removed 7300 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 301 rows containing missing values (geom_point).
## Warning: Removed 140 rows containing missing values (geom_point).
## Warning: Removed 50 rows containing missing values (geom_point).
## Warning: Removed 3086 rows containing missing values (geom_point).
## Warning: Removed 105 rows containing missing values (geom_point).
## Warning: Removed 27 rows containing missing values (geom_point).
## Warning: Removed 291 rows containing missing values (geom_point).
## Warning: Removed 31 rows containing missing values (geom_point).
## Warning: Removed 601 rows containing missing values (geom_point).
## Warning: Removed 1096 rows containing missing values (geom_point).
## Warning: Removed 768 rows containing missing values (geom_point).
## Warning: Removed 80 rows containing missing values (geom_point).
## Warning: Removed 566 rows containing missing values (geom_point).
## Warning: Removed 166 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 133 rows containing missing values (geom_point).
## Warning: Removed 85 rows containing missing values (geom_point).
## Warning: Removed 118 rows containing missing values (geom_point).
## Warning: Removed 53 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 871 rows containing missing values (geom_point).
## Warning: Removed 76 rows containing missing values (geom_point).
## Warning: Removed 35 rows containing missing values (geom_point).
## Warning: Removed 92 rows containing missing values (geom_point).
## Warning: Removed 154 rows containing missing values (geom_point).
## Warning: Removed 28 rows containing missing values (geom_point).
## Warning: Removed 309 rows containing missing values (geom_point).
## Warning: Removed 142 rows containing missing values (geom_point).

# Scatterplot of estimated loss to estimated return, colored by loan amount,
# and faceted by state
ggplot(
  aes(x = EstimatedLoss, y = EstimatedReturn),
  data = loans) +
  geom_point(aes(color = LoanOriginalAmount)) +
  facet_wrap( ~ BorrowerState) +
  labs(
    title = "Estimated loss to return",
    x = "Estimated loss", y = "Estimated return")
## Warning: Removed 5515 rows containing missing values (geom_point).
## Warning: Removed 33 rows containing missing values (geom_point).
## Warning: Removed 492 rows containing missing values (geom_point).
## Warning: Removed 86 rows containing missing values (geom_point).
## Warning: Removed 544 rows containing missing values (geom_point).
## Warning: Removed 3956 rows containing missing values (geom_point).
## Warning: Removed 481 rows containing missing values (geom_point).
## Warning: Removed 135 rows containing missing values (geom_point).
## Warning: Removed 54 rows containing missing values (geom_point).
## Warning: Removed 33 rows containing missing values (geom_point).
## Warning: Removed 1314 rows containing missing values (geom_point).
## Warning: Removed 1661 rows containing missing values (geom_point).
## Warning: Removed 67 rows containing missing values (geom_point).
## Warning: Removed 186 rows containing missing values (geom_point).
## Warning: Removed 196 rows containing missing values (geom_point).
## Warning: Removed 1657 rows containing missing values (geom_point).
## Warning: Removed 426 rows containing missing values (geom_point).
## Warning: Removed 208 rows containing missing values (geom_point).
## Warning: Removed 95 rows containing missing values (geom_point).
## Warning: Removed 113 rows containing missing values (geom_point).
## Warning: Removed 411 rows containing missing values (geom_point).
## Warning: Removed 580 rows containing missing values (geom_point).
## Warning: Removed 101 rows containing missing values (geom_point).
## Warning: Removed 971 rows containing missing values (geom_point).
## Warning: Removed 603 rows containing missing values (geom_point).
## Warning: Removed 822 rows containing missing values (geom_point).
## Warning: Removed 112 rows containing missing values (geom_point).
## Warning: Removed 109 rows containing missing values (geom_point).
## Warning: Removed 645 rows containing missing values (geom_point).
## Warning: Removed 52 rows containing missing values (geom_point).
## Warning: Removed 119 rows containing missing values (geom_point).
## Warning: Removed 108 rows containing missing values (geom_point).
## Warning: Removed 372 rows containing missing values (geom_point).
## Warning: Removed 141 rows containing missing values (geom_point).
## Warning: Removed 66 rows containing missing values (geom_point).
## Warning: Removed 885 rows containing missing values (geom_point).
## Warning: Removed 824 rows containing missing values (geom_point).
## Warning: Removed 239 rows containing missing values (geom_point).
## Warning: Removed 603 rows containing missing values (geom_point).
## Warning: Removed 294 rows containing missing values (geom_point).
## Warning: Removed 26 rows containing missing values (geom_point).
## Warning: Removed 128 rows containing missing values (geom_point).
## Warning: Removed 199 rows containing missing values (geom_point).
## Warning: Removed 1208 rows containing missing values (geom_point).
## Warning: Removed 355 rows containing missing values (geom_point).
## Warning: Removed 499 rows containing missing values (geom_point).
## Warning: Removed 36 rows containing missing values (geom_point).
## Warning: Removed 895 rows containing missing values (geom_point).
## Warning: Removed 321 rows containing missing values (geom_point).
## Warning: Removed 81 rows containing missing values (geom_point).
## Warning: Removed 27 rows containing missing values (geom_point).

# Scatterplot of estimated loss to estimated return, colored by loan amount,
# and faceted by income range
ggplot(
  aes(x = EstimatedLoss, y = EstimatedReturn),
  data = loans) +
  geom_point(aes(color = LoanOriginalAmount)) +
  facet_wrap( ~ IncomeRange) +
  labs(
    title = "Estimated loss to return",
    x = "Estimated loss", y = "Estimated return")
## Warning: Removed 576 rows containing missing values (geom_point).
## Warning: Removed 2620 rows containing missing values (geom_point).
## Warning: Removed 2132 rows containing missing values (geom_point).
## Warning: Removed 8017 rows containing missing values (geom_point).
## Warning: Removed 5423 rows containing missing values (geom_point).
## Warning: Removed 2418 rows containing missing values (geom_point).
## Warning: Removed 7741 rows containing missing values (geom_point).
## Warning: Removed 157 rows containing missing values (geom_point).

# Scatterplot of estimated loss to estimated return, colored by loan amount,
# and faceted by prosper risk score
ggplot(
  aes(x = EstimatedLoss, y = EstimatedReturn),
  data = loans) +
  geom_point(aes(color = LoanOriginalAmount)) +
  facet_wrap( ~ ProsperScore) +
  labs(
    title = "Estimated loss to return",
    x = "Estimated loss", y = "Estimated return")
## Warning: Removed 29084 rows containing missing values (geom_point).

###


# Scatterplot of credit score to loan amount, colored by number of investors,
ggplot(
  aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  labs(
    title = "Credit score to loan amount",
    x = "Credit score", y = "Loan amount")
## Warning: Removed 591 rows containing missing values (geom_point).

# Scatterplot of credit score to loan amount, colored by number of investors,
# and faceted by state
ggplot(
  aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  facet_wrap( ~ BorrowerState) +
  labs(
    title = "Credit score to loan amount",
    x = "Credit score", y = "Loan amount")
## Warning: Removed 591 rows containing missing values (geom_point).

# Scatterplot of credit score to loan amount, colored by number of investors,
# and faceted by occupation
ggplot(
  aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  facet_wrap( ~ Occupation) +
  labs(
    title = "Credit score to loan amount",
    x = "Credit score", y = "Loan amount")
## Warning: Removed 589 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

# Scatterplot of credit score to loan amount, colored by number of investors,
# and faceted by income range
ggplot(
  aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  facet_wrap( ~ IncomeRange) +
  labs(
    title = "Credit score to loan amount",
    x = "Credit score", y = "Loan amount")
## Warning: Removed 591 rows containing missing values (geom_point).

# Scatterplot of credit score to loan amount, colored by number of investors,
# and faceted by prosper risk score
ggplot(
  aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  facet_wrap( ~ ProsperScore) +
  labs(
    title = "Credit score to loan amount",
    x = "Credit score", y = "Loan amount")
## Warning: Removed 591 rows containing missing values (geom_point).

###

# Scatterplot of loan amounts per date, colored by number of investors,
ggplot(
  aes(x = ListingCreationDate, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  labs(
    title = "Loan amounts by date",
    x = "Date", y = "Loan amount")

# Scatterplot of loan amounts per date, colored by number of investors,
# and faceted by prosper risk score
ggplot(
  aes(x = ListingCreationDate, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  facet_wrap( ~ ProsperScore) +
  labs(
    title = "Loan amounts by date",
    x = "Date", y = "Loan amount")

# Scatterplot of loan amounts per date, colored by number of investors,
# and faceted by income range
ggplot(
  aes(x = ListingCreationDate, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  facet_wrap( ~ IncomeRange) +
  labs(
    title = "Loan amounts by date",
    x = "Date", y = "Loan amount")

# Scatterplot of loan amounts per date, colored by number of investors,
# and faceted by occupation
ggplot(
  aes(x = ListingCreationDate, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  facet_wrap( ~ Occupation) +
  labs(
    title = "Loan amounts by date",
    x = "Date", y = "Loan amount")

# Scatterplot of loan amounts per date, colored by number of investors,
# and faceted by state
ggplot(
  aes(x = ListingCreationDate, y = LoanOriginalAmount),
  data = loans) +
  geom_point(aes(color = Investors)) +
  facet_wrap( ~ BorrowerState) +
  labs(
    title = "Loan amounts by date",
    x = "Date", y = "Loan amount")

###


# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
ggplot(
  aes(x = LoanOriginalAmount, y = AmountDelinquent),
  data = loans) +
  geom_point(aes(color = EstimatedReturn)) +
  labs(
    title = "Loan amounts compared to delinquent amounts",
    x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 7622 rows containing missing values (geom_point).

# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
# and faceted by state
ggplot(
  aes(x = LoanOriginalAmount, y = AmountDelinquent),
  data = loans) +
  geom_point(aes(color = EstimatedReturn)) +
  facet_wrap( ~ BorrowerState) +
  labs(
    title = "Loan amounts compared to delinquent amounts",
    x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 3778 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 96 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 95 rows containing missing values (geom_point).
## Warning: Removed 708 rows containing missing values (geom_point).
## Warning: Removed 65 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 172 rows containing missing values (geom_point).
## Warning: Removed 318 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 28 rows containing missing values (geom_point).
## Warning: Removed 221 rows containing missing values (geom_point).
## Warning: Removed 68 rows containing missing values (geom_point).
## Warning: Removed 40 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 29 rows containing missing values (geom_point).
## Warning: Removed 62 rows containing missing values (geom_point).
## Warning: Removed 50 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 178 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 149 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 91 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 45 rows containing missing values (geom_point).
## Warning: Removed 25 rows containing missing values (geom_point).
## Warning: Removed 137 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 106 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 636 rows containing missing values (geom_point).
## Warning: Removed 53 rows containing missing values (geom_point).
## Warning: Removed 60 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 138 rows containing missing values (geom_point).
## Warning: Removed 47 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
# and faceted by occupation
ggplot(
  aes(x = LoanOriginalAmount, y = AmountDelinquent),
  data = loans) +
  geom_point(aes(color = EstimatedReturn)) +
  facet_wrap( ~ Occupation) +
  labs(
    title = "Loan amounts compared to delinquent amounts",
    x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 2255 rows containing missing values (geom_point).
## Warning: Removed 114 rows containing missing values (geom_point).
## Warning: Removed 181 rows containing missing values (geom_point).
## Warning: Removed 155 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 31 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 301 rows containing missing values (geom_point).
## Warning: Removed 262 rows containing missing values (geom_point).
## Warning: Removed 91 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 38 rows containing missing values (geom_point).
## Warning: Removed 50 rows containing missing values (geom_point).
## Warning: Removed 160 rows containing missing values (geom_point).
## Warning: Removed 20 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 51 rows containing missing values (geom_point).
## Warning: Removed 31 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 85 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 41 rows containing missing values (geom_point).
## Warning: Removed 88 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 25 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 48 rows containing missing values (geom_point).
## Warning: Removed 1537 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 49 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 591 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 62 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 128 rows containing missing values (geom_point).
## Warning: Removed 258 rows containing missing values (geom_point).
## Warning: Removed 156 rows containing missing values (geom_point).
## Warning: Removed 19 rows containing missing values (geom_point).
## Warning: Removed 102 rows containing missing values (geom_point).
## Warning: Removed 20 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 28 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 146 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 23 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_point).

# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
# and faceted by prosper risk score
ggplot(
  aes(x = LoanOriginalAmount, y = AmountDelinquent),
  data = loans) +
  geom_point(aes(color = EstimatedReturn)) +
  facet_wrap( ~ ProsperScore) +
  labs(
    title = "Loan amounts compared to delinquent amounts",
    x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 7622 rows containing missing values (geom_point).

# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
# and faceted by income range.
ggplot(
  aes(x = LoanOriginalAmount, y = AmountDelinquent),
  data = loans) +
  geom_point(aes(color = EstimatedReturn)) +
  facet_wrap( ~ IncomeRange) +
  labs(
    title = "Loan amounts compared to delinquent amounts",
    x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 7602 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

###

# Line graph of average loan amounts by year
ggplot(
  aes(x = ListingCreationYear, y = mean_loanOriginalAmount),
  data = loans.summary_by_year) +
  geom_line() +
  geom_point(aes(size = count)) +
  labs(
    title = "Average loan amounts by year",
    x = "Year", y = "Average loan amount") +
  scale_size(name = "Number of loans")

# Line graph of average amount delinquent by year
ggplot(
  aes(x = ListingCreationYear, y = mean_amountDelinquent),
  data = loans.summary_by_year) +
  geom_line() +
  geom_point(aes(size = count)) +
  labs(
    title = "Average amount delinquent by year",
    x = "Year", y = "Average delinquent amount") +
  scale_size(name = "Number of loans")
## Warning: Removed 4 rows containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).

# Line graph of average number of investors by year
ggplot(
  aes(x = ListingCreationYear, y = mean_investors),
  data = loans.summary_by_year) +
  geom_line() +
  geom_point(aes(size = count)) +
  labs(
    title = "Average number of investors by year",
    x = "Year", y = "Average number of investors") +
  scale_size(name = "Number of loans")

# Line graph of average prosper score by year
ggplot(
  aes(x = ListingCreationYear, y = mean_prosperScore),
  data = loans.summary_by_year) +
  geom_line() +
  geom_point(aes(size = count)) +
  labs(
    title = "Average Prosper score by year",
    x = "Year", y = "Average prosper score") +
  scale_size(name = "Number of loans")
## Warning: Removed 5 rows containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).

# Line graph of average estimated lost by year
ggplot(
  aes(x = ListingCreationYear, y = mean_estimatedLoss),
  data = loans.summary_by_year) +
  geom_line() +
  geom_point(aes(size = count)) +
  labs(
    title = "Average estimated loss by year",
    x = "Year", y = "Average estimated loss") +
  scale_size(name = "Number of loans")
## Warning: Removed 5 rows containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).

# Line graph of average estimated return by year
ggplot(
  aes(x = ListingCreationYear, y = mean_estimatedReturn),
  data = loans.summary_by_year) +
  geom_line() +
  geom_point(aes(size = count)) +
  labs(
    title = "Average estimated loss by year",
    x = "Year", y = "Average estimated return") +
  scale_size(name = "Number of loans")
## Warning: Removed 5 rows containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).